glimpse(trout_batter_data)
#> Rows: 24,161
#> Columns: 92
#> $ pitch_type                      <chr> "CU", "FT", "FF", "CH", "FT", "FT", "C…
#> $ game_date                       <date> 2011-09-28, 2011-09-28, 2011-09-28, 2…
#> $ release_speed                   <dbl> 78.4, 91.0, 95.5, 81.8, 93.6, 93.2, 82…
#> $ release_pos_x                   <dbl> 2.06, 1.98, 1.92, 2.06, 1.62, 1.72, 2.…
#> $ release_pos_z                   <dbl> 6.33, 6.29, 6.20, 6.29, 6.20, 6.23, 6.…
#> $ player_name                     <chr> "Trout, Mike", "Trout, Mike", "Trout, …
#> $ batter                          <dbl> 545361, 545361, 545361, 545361, 545361…
#> $ pitcher                         <dbl> 457448, 457448, 457448, 457448, 457448…
#> $ events                          <chr> "single", NA, "strikeout", NA, NA, NA,…
#> $ description                     <chr> "hit_into_play", "called_strike", "swi…
#> $ spin_dir                        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ spin_rate_deprecated            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ break_angle_deprecated          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ break_length_deprecated         <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ zone                            <dbl> 7, 1, 5, 12, 13, 11, 12, 11, 8, 7, 12,…
#> $ des                             <chr> "Mike Trout singles on a sharp ground …
#> $ game_type                       <chr> "R", "R", "R", "R", "R", "R", "R", "R"…
#> $ stand                           <chr> "R", "R", "R", "R", "R", "R", "R", "R"…
#> $ p_throws                        <chr> "L", "L", "L", "L", "L", "L", "L", "L"…
#> $ home_team                       <chr> "LAA", "LAA", "LAA", "LAA", "LAA", "LA…
#> $ away_team                       <chr> "TEX", "TEX", "TEX", "TEX", "TEX", "TE…
#> $ type                            <chr> "X", "S", "S", "S", "S", "S", "X", "B"…
#> $ hit_location                    <dbl> 7, NA, 2, NA, NA, NA, 3, NA, NA, 7, NA…
#> $ bb_type                         <chr> "ground_ball", NA, NA, NA, NA, NA, "gr…
#> $ balls                           <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 1, 1,…
#> $ strikes                         <dbl> 1, 0, 2, 2, 1, 0, 1, 1, 0, 2, 2, 1, 0,…
#> $ game_year                       <dbl> 2011, 2011, 2011, 2011, 2011, 2011, 20…
#> $ pfx_x                           <dbl> 0.22, 1.19, 0.88, 1.63, 1.27, 1.07, 1.…
#> $ pfx_z                           <dbl> -0.53, 1.47, 1.48, 0.81, 1.55, 1.44, 1…
#> $ plate_x                         <dbl> -0.52, -0.58, 0.17, 1.17, -1.02, -0.95…
#> $ plate_z                         <dbl> 1.73, 3.33, 2.48, 2.59, 2.10, 2.78, 2.…
#> $ on_3b                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ on_2b                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ on_1b                           <dbl> 430947, 430947, 519068, 519068, 519068…
#> $ outs_when_up                    <dbl> 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 0, 0, 0,…
#> $ inning                          <dbl> 5, 5, 3, 3, 3, 3, 1, 1, 1, 8, 8, 8, 8,…
#> $ inning_topbot                   <chr> "Bot", "Bot", "Bot", "Bot", "Bot", "Bo…
#> $ hc_x                            <dbl> 66.27, NA, NA, NA, NA, NA, 152.61, NA,…
#> $ hc_y                            <dbl> 110.44, NA, NA, NA, NA, NA, 166.67, NA…
#> $ tfs_deprecated                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ tfs_zulu_deprecated             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ fielder_2                       <dbl> 435063, 435063, 435063, 435063, 435063…
#> $ umpire                          <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ sv_id                           <chr> "110928_182624", "110928_182604", "110…
#> $ vx0                             <dbl> -6.54, -9.91, -7.37, -5.95, -10.58, -1…
#> $ vy0                             <dbl> -114.09, -132.14, -138.69, -118.93, -1…
#> $ vz0                             <dbl> -1.74, -4.61, -7.54, -3.16, -8.26, -6.…
#> $ ax                              <dbl> 3.27, 16.89, 14.12, 18.22, 18.78, 15.8…
#> $ ay                              <dbl> 20.91, 26.66, 30.06, 21.93, 29.47, 31.…
#> $ az                              <dbl> -38.81, -16.35, -14.64, -26.02, -14.59…
#> $ sz_top                          <dbl> 3.39, 3.39, 3.39, 3.39, 3.39, 3.39, 3.…
#> $ sz_bot                          <dbl> 1.56, 1.56, 1.56, 1.56, 1.56, 1.56, 1.…
#> $ hit_distance_sc                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ launch_speed                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ launch_angle                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ effective_speed                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ release_spin_rate               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ release_extension               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ game_pk                         <dbl> 289322, 289322, 289322, 289322, 289322…
#> $ pitcher_1                       <dbl> 457448, 457448, 457448, 457448, 457448…
#> $ fielder_2_1                     <dbl> 435063, 435063, 435063, 435063, 435063…
#> $ fielder_3                       <dbl> 519048, 519048, 519048, 519048, 519048…
#> $ fielder_4                       <dbl> 435079, 435079, 435079, 435079, 435079…
#> $ fielder_5                       <dbl> 134181, 134181, 134181, 134181, 134181…
#> $ fielder_6                       <dbl> 462101, 462101, 462101, 462101, 462101…
#> $ fielder_7                       <dbl> 461815, 461815, 461815, 461815, 461815…
#> $ fielder_8                       <dbl> 285078, 285078, 285078, 285078, 285078…
#> $ fielder_9                       <dbl> 443558, 443558, 443558, 443558, 443558…
#> $ release_pos_y                   <dbl> 50, 50, 50, 50, 50, 50, 50, 50, 50, 50…
#> $ estimated_ba_using_speedangle   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ estimated_woba_using_speedangle <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ woba_value                      <dbl> 0.90, NA, 0.00, NA, NA, NA, 0.00, NA, …
#> $ woba_denom                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ babip_value                     <dbl> 1, NA, 0, NA, NA, NA, 0, NA, NA, 1, NA…
#> $ iso_value                       <dbl> 0, NA, 0, NA, NA, NA, 0, NA, NA, 0, NA…
#> $ launch_speed_angle              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ at_bat_number                   <dbl> 39, 39, 23, 23, 23, 23, 6, 6, 6, 60, 6…
#> $ pitch_number                    <dbl> 2, 1, 4, 3, 2, 1, 3, 2, 1, 5, 4, 3, 2,…
#> $ pitch_name                      <chr> "Curveball", "2-Seam Fastball", "4-Sea…
#> $ home_score                      <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,…
#> $ away_score                      <dbl> 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,…
#> $ bat_score                       <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,…
#> $ fld_score                       <dbl> 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,…
#> $ post_away_score                 <dbl> 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,…
#> $ post_home_score                 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,…
#> $ post_bat_score                  <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,…
#> $ post_fld_score                  <dbl> 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,…
#> $ if_fielding_alignment           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ of_fielding_alignment           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ spin_axis                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ delta_home_win_exp              <dbl> 0.035, 0.000, -0.028, 0.000, 0.000, 0.…
#> $ delta_run_exp                   <dbl> 0.314, -0.035, -0.134, 0.000, -0.047, …

interesting factors:

trout_batter_data %>%
  filter(!is.na(hc_x)) %>%
  ggplot(aes(x = hc_x, y = -hc_y)) +
  facet_wrap("game_year") +
  geom_point(aes(color = bb_type), size = 0.5, alpha = 0.5) +
  scale_color_brewer(type = "qual", palette = "Set1") +
  coord_fixed() +
  theme(
    axis.text = element_blank(),
    axis.line = element_blank(),
    axis.title = element_blank()
  ) +
  labs(title = "Mike Trout hits", color = "hit type")

trout_batter_data %>%
  filter(!is.na(hc_x)) %>%
  ggplot(aes(x = hc_x, y = -hc_y)) +
  geom_point(aes(color = game_year), size = 1, alpha = 0.5) +
  scale_color_distiller(type = "div", palette = "RdYlBu") +
  coord_fixed() +
  theme(
    axis.text = element_blank(),
    axis.line = element_blank(),
    axis.title = element_blank()
  )

xmax <- -1
xmin <- 1
ymax <- 3.5
ymin <- 1.5
strike_zone <- tibble(
  x = c(xmin, xmax, xmax, xmin, xmin),
  y = c(ymax, ymax, ymin, ymin, ymax)
)

trout_batter_data %>%
  filter(description %in% c("called_strike", "ball")) %>%
  ggplot(aes(x = plate_x, y = plate_z)) +
  facet_wrap(vars(description), nrow = 1) +
  geom_point(aes(color = description), size = 0.5, alpha = 0.5) +
  geom_path(aes(x = x, y = y), data = strike_zone) +
  scale_color_brewer(type = "qual", palette = "Dark2") +
  coord_fixed()
#> Warning: Removed 31 rows containing missing values (geom_point).

keep_pitch_results <- c(
  "swinging_strike", "called_strike", "hit_into_play", "foul"
)

trout_batter_data %>%
  filter(!(is.na(plate_x) || is.na(plate_z))) %>%
  filter(!is.na(pitch_type)) %>%
  filter(!str_detect(description, "bunt")) %>%
  mutate(description = case_when(
    description == "swinging_strike_blocked" ~ "swinging_strike",
    str_detect(description, "foul") ~ "foul",
    TRUE ~ description
  )) %>%
  filter(description %in% keep_pitch_results) %>%
  mutate(description = str_replace(description, "_", " ")) %>%
  group_by(pitch_type) %>%
  filter(n() > 100) %>%
  ggplot(aes(x = plate_x, y = plate_z)) +
  facet_grid(rows = vars(pitch_type), cols = vars(description)) +
  geom_point(aes(color = pitch_type), size = 0.4, alpha = 0.7) +
  geom_path(aes(x = x, y = y), data = strike_zone) +
  geom_density_2d() +
  scale_x_continuous(expand = expansion(mult = c(0, 0))) +
  scale_y_continuous(expand = expansion(mult = c(0, 0))) +
  coord_fixed() +
  theme(
    axis.line = element_blank()
  ) +
  labs(x = "plate x", y = "plate y")